Import data
vote_df = read_csv("./datasets/president_county_candidate.csv")
state_sum = read_csv("./datasets/president_state.csv")
region_df =
read_csv("./datasets/states.csv") %>%
rename(state = State)
Merge Data and Tidy up the data
election_df =
merge(
read_csv("./datasets/president_county_candidate.csv"),
read_csv("./datasets/president_state.csv"),
all = TRUE
) %>%
group_by(state, candidate) %>%
summarise(
state_votes = sum(total_votes)
) %>%
filter(candidate == "Joe Biden" | candidate == "Donald Trump") %>%
group_by(state) %>%
mutate(state_subtotal = sum(state_votes))
election_by_region =
merge(
election_df,
region_df,
by = "state"
) %>%
select(-`State Code`) %>%
mutate(prop_votes = round(state_votes / state_subtotal,2))
Plot the proportion of each candidate by states
colors <- c("red", "blue")
names(colors) = c("Donald Trump", "Joe Biden")
plot_1 =
election_by_region %>%
filter(Region == "Midwest") %>%
ggplot(aes(x = state, y = prop_votes, fill = candidate )) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(x = "Midwest",
y = "Vote Proportion") +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
plot_2 =
election_by_region %>%
filter(Region == "West") %>%
ggplot(aes(x = state, y = prop_votes, fill = candidate )) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(x = "West",
y = "Vote Proportion") +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
plot_3 =
election_by_region %>%
filter(Region == "South") %>%
ggplot(aes(x = state, y = prop_votes, fill = candidate )) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(x = "South",
y = "Vote Proportion") +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
plot_4 =
election_by_region %>%
filter(Region == "Northeast") %>%
ggplot(aes(x = state, y = prop_votes,fill = candidate )) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(x = "Northeast",
y = "Votes Proportion") +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
par( mfrow = c(2,2) )
plot_1 + plot_2 + plot_3 + plot_4 + plot_layout(guides = "collect")

main_plot =
election_by_region %>%
group_by(Region, candidate) %>%
mutate(
region_votes = sum(state_votes),
region_total = sum(state_subtotal),
region_prop = round(region_votes/region_total, 2)
) %>%
ggplot(aes(x = Region, y = region_prop, fill = candidate,
text = paste("Region: ", Region ,
"</br></br>Proportion ", region_prop,
"</br>Candidate: ", candidate))) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Proportion of Votes in Four Main Regions",
x = "Region",
y = "Vote Proportion") +
scale_fill_manual(values = colors)
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : num 0.5
## ..$ angle : num 90
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
ggplotly(main_plot, tooltip = "text")
Bar Plot of total votes in Each State segamented by Region
options(scipen = 999)
total_votes_p1 =
election_by_region %>%
filter(Region == "Midwest") %>%
ggplot(aes(x = state, y = state_subtotal/1000000)) +
geom_bar(stat = "identity", position = position_dodge(), fill = "red") +
labs(
x = "Midwest",
y = "Total Votes (M)"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
total_votes_p2 =
election_by_region %>%
filter(Region == "South") %>%
ggplot(aes(x = state, y = state_subtotal/1000000)) +
geom_bar(stat = "identity", position = position_dodge(), fill = "blue") +
labs(
x = "South",
y = "Total Votes (M)"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
total_votes_p3 =
election_by_region %>%
filter(Region == "West") %>%
ggplot(aes(x = state, y = state_subtotal/1000000)) +
geom_bar(stat = "identity", position = position_dodge(), fill = "red") +
labs(
x = "West",
y = "Total Votes (M)"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
total_votes_p4 =
election_by_region %>%
filter(Region == "Northeast") %>%
ggplot(aes(x = state, y = state_subtotal/1000000)) +
geom_bar(stat = "identity", position = position_dodge(), fill = "blue") +
labs(
x = "Northeast",
y = "Total Votes (M)"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
par( mfrow = c(2,2) )
total_votes_p1 + total_votes_p2 + total_votes_p3 + total_votes_p4

Clean Data Specifying Winner
election_winner_df =
read_csv("./datasets/president_county_candidate.csv") %>%
group_by(state, party) %>%
mutate(party_total = sum(total_votes)) %>%
ungroup() %>%
group_by(state) %>%
mutate(state_winner = case_when(
party_total == max(party_total) ~ TRUE,
party_total != max(party_total) ~ FALSE),
state_total = sum(total_votes)
)
winner_region =
left_join(election_winner_df, region_df) %>%
filter(state_winner == TRUE) %>%
select(state, candidate, state_total, Region) %>%
distinct()
Barplot for each states by 4 regions specifying winner
colors <- c("red", "blue")
names(colors) = c("Donald Trump", "Joe Biden")
Midwest_votes_p =
winner_region %>%
filter(Region == "Midwest") %>%
ggplot(aes(x = state, y = state_total/1000000, fill = candidate)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(
x = "Midwest",
y = "Total Votes (M)"
) +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
West_votes_p =
winner_region %>%
filter(Region == "West") %>%
ggplot(aes(x = state, y = state_total/1000000, fill = candidate)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(
x = "West",
y = "Total Votes (M)"
) +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
South_votes_p =
winner_region %>%
filter(Region == "South") %>%
ggplot(aes(x = state, y = state_total/1000000, fill = candidate)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(
x = "South",
y = "Total Votes (M)"
) +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Northeast_votes_p =
winner_region %>%
filter(Region == "Northeast") %>%
ggplot(aes(x = state, y = state_total/1000000, fill = candidate)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(
x = "Northeast",
y = "Total Votes (M)"
) +
scale_fill_manual(values = colors) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Midwest_votes_p + West_votes_p + South_votes_p + Northeast_votes_p + plot_layout(guides = "collect")

Plot the map with the election result.
colors <- c("red", "blue")
names(colors) = c("Donald Trump", "Joe Biden")
election_result_map =
ggplot(data = usa_election_map,
aes(x = long, y = lat,
group = group, fill = candidate,
text = paste("State: ", state ,
"</br></br>Candidate: ", candidate,
"</br>Votes: ", party_total,
"</br>Winning Proportion: ", round(party_total/state_total, 2)))) +
geom_polygon(color = "gray90", size = 0.1) +
labs(title = "Election Results across states") +
scale_fill_manual(values = colors) +
theme_void() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom")
election_result_map

ggplotly(election_result_map, tooltip = "text")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.